

global projectdir ""
global datadir "$projectdir/data"

cd $datadir/raw_pulls/lehd_usiris
use ecf_interleave_sein_t26_2018_usiris_2018q4, clear
keep sein year quarter firmid fas_ein fas_ein_flag firmage source_age firmage_flag
tempfile ecf_sein
save `ecf_sein', replace

use ecf_interleave_sein_t13_2018_usiris_2018q4, clear
keep sein year quarter qtime es_state mode_naics2017fnl_emp mode_naics2012fnl_emp mode_naics2007fnl_emp firm_birth_year firm_birth_quarter
merge 1:1 sein year quarter using `ecf_sein'
drop _merge
save `ecf_sein', replace


cd $datadir/intermediate_files
use phf_interleave_b_2018_usiris_2018q4_LONG, clear
rename seinunit1 seinunit
gen source = "PHF"
tempfile phf
save `phf', replace

use opm_us_ehf_phf_2014_usiris_2018q4_LONG, clear
gen source = "OPM"
gen spell_u2w = 1
append using `phf'

*sort pik qtime sein seinunit
merge m:1 sein qtime using `ecf_sein'
drop if _merge == 2
drop _merge
* _merge == 1: No year/quarter info (but have qtime) -- add at end.
compress
order pik sein spell_u2w wage
sort pik sein spell_u2w
save iris_lehd_jobhistory_2018q4, replace

* These are imputations at the SEIN-SEINUNIT level.
cd $datadir/intermediate_files
merge m:1 sein seinunit qtime using sein_seinunit_qtime_naics_imp
tab _merge source
* All non-matches are due to OPM data.
drop _merge
order pik sein spell_u2w wage
sort pik sein spell_u2w
compress
cd $datadir/intermediate_files
save iris_lehd_jobhistory_2018q4, replace


************************************************************
* IPEDS EINs
cd $datadir/intermediate_files
use organize_ipeds_ein, clear
keep ein
duplicates drop
tempfile univeins
save `univeins', replace

cd $datadir/intermediate_files
use iris_lehd_jobhistory_2018q4, clear
gen ein = regexs(1) if regexm(fas_ein, "^EINUS(.*)")
merge m:1 ein using `univeins'
drop if _merge == 2
gen univ_ein = (_merge == 3)
drop _merge
compress
cd $datadir/intermediate_files
save iris_lehd_jobhistory_2018q4, replace
************************************************************


**************************************************************************************
**************************************************************************************

* Clean up firmid

cd $datadir/intermediate_files
use iris_lehd_jobhistory_2018q4, clear

gen firmid_edit_source = ""
gen firmid_edit = ""

replace firmid_edit = firmid
replace firmid_edit_source = "firmid" if !missing(firmid)

replace firmid_edit_source = "ein" if regexm(fas_ein, "^EINUS(.*)") & missing(firmid_edit)
replace firmid_edit = "0" + regexs(1) if regexm(fas_ein, "^EINUS(.*)") & missing(firmid_edit)

replace firmid_edit_source = "State EIN" if missing(firmid_edit_source) & source != "OPM"

replace firmid_edit_source = "OPM" if missing(firmid_edit_source) & source == "OPM"

compress
cd $datadir/intermediate_files
save iris_lehd_jobhistory_2018q4, replace

**************************************************************************************
**************************************************************************************

**************************************************************************************
**************************************************************************************
* The year and quarter varible came from ECF, but there are some non-matches with PHF
*   add missing year/quarter here.
cd $datadir
import excel "interleave_quarter_lookup.xlsx", clear firstrow
rename quarter_lehd qtime
rename year year_lookup
rename quarter quarter_lookup

cd $datadir/intermediate_files
merge 1:m qtime using iris_lehd_jobhistory_2018q4
drop if _merge == 1
drop _merge

gen year_match = (year == year_lookup)
tab year_match if !missing(year)

gen quarter_match = (quarter == quarter_lookup)
tab quarter_match if !missing(quarter)

gen year_quarter_match = (year == year_lookup & quarter == quarter_lookup)
tab year_quarter_match if !missing(year)

drop year_match quarter_match year_quarter_match

drop year quarter
rename year_lookup year
rename quarter_lookup quarter

order pik sein spell_u2w wage
sort pik sein spell_u2w
compress
cd $datadir/intermediate_files
save iris_lehd_jobhistory_2018q4, replace

**************************************************************************************
**************************************************************************************

